title: “Maternal Mortality in New York” output: rmarkdown::github_document
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
library(tidycensus)
library(dplyr)
library(janitor)
library(ggplot2)
library(tmap)
library(tmaptools)
library(sf)
## Linking to GEOS 3.4.2, GDAL 2.1.2, proj.4 4.9.1
library(leaflet)
library(plyr)
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following object is masked from 'package:purrr':
##
## compact
maternal_deaths <- read_csv("/Users/cecilialow-weiner/Desktop/Data Incubator Project /2013_maternal_mortality_rate.csv")
## Parsed with column specification:
## cols(
## boro_name = col_character(),
## Live_Births = col_integer(),
## Maternal_deaths = col_integer()
## )
maternal_mortality_rate <- maternal_deaths %>%
group_by(boro_name) %>%
mutate(rate=(Maternal_deaths/Live_Births)*100000)
ggplot(data = maternal_mortality_rate) +
geom_point(mapping = aes(x = Live_Births, y = Maternal_deaths, color=boro_name))
ggplot(data=maternal_mortality_rate, aes(x=boro_name, y=rate, fill=boro_name)) +
geom_bar(stat="identity") +
scale_fill_brewer(palette = "Set2", name = element_blank()) +
scale_y_continuous(labels = scales::percent) +
labs(
x = "Borough Name",
y = "Maternal Mortality Rate",
title = "Maternal Mortality Rate by Borough")
borough <- "/Users/cecilialow-weiner/Downloads/Borough Boundaries/geo_export_838ba8d5-de6d-49a7-a7a6-681c253a5cf5.shp"
nyc_boroughs <- read_shape(file=borough, as.sf = TRUE)
qtm(nyc_boroughs)
merged_borough <- merge(nyc_boroughs,maternal_mortality_rate,by="boro_name")
tm_shape(merged_borough) +
tm_fill(
col = "rate",
n = 5,
style = "jenks"
)
ttm()
## tmap mode set to interactive viewing
tm_shape(merged_borough) +
tm_fill(
col = "rate",
palette = "GnBu",
contrast = c(0.2, 0.8),
n = 5,
style = "jenks",
title = "Maternal Mortality Rate",
legend.format = list(
fun = function(x) {
paste0(formatC(x, digits = 1 , format = "f"), "%")
}
),
popup.vars = c(
"Maternal Mortality Rate" = "rate"
),
id = "NAME",
popup.format = list(rate = list(format = "f"))
) +
tm_borders(col = "gray40", lwd = 1, lty = "solid", alpha = NA)
counties <- ("/Users/cecilialow-weiner/Downloads/cugir-007865/cty036.shp")
ny_counties <- read_shape(file=counties, as.sf = TRUE)
maternal_mortality_ny <- read_csv("/Users/cecilialow-weiner/Desktop/Data Incubator Project /Maternal_deaths_2013.csv")
## Parsed with column specification:
## cols(
## NAME = col_character(),
## Live_Births = col_integer(),
## Maternal_Mortality = col_integer(),
## COUNTYFP = col_character()
## )
maternal_mortality_rate_ny <- maternal_mortality_ny %>%
group_by(NAME) %>%
mutate(rate=(Maternal_Mortality/Live_Births)*100000)
maternal_mortality_rate_ny$COUNTYFP <- as.factor(maternal_mortality_rate_ny$COUNTYFP)
merged_counties <- merge(ny_counties,maternal_mortality_rate_ny,by="NAME")
tm_shape(merged_counties) +
tm_fill(
col = "rate",
n = 5,
style = "jenks"
)
ttm()
## tmap mode set to plotting
tm_shape(merged_counties) +
tm_fill(
col = "rate",
palette = "GnBu",
contrast = c(0.2, 0.8),
n = 5,
style = "jenks",
title = "Maternal Mortality Rate",
legend.format = list(
fun = function(x) {
paste0(formatC(x, digits = 1 , format = "f"), "%")
}
),
popup.vars = c(
"Maternal Mortality Rate" = "rate"
),
id = "NAME",
popup.format = list(rate = list(format = "f"))
) +
tm_borders(col = "gray40", lwd = 1, lty = "solid", alpha = NA)
international_maternal_mortality <- read_csv("/Users/cecilialow-weiner/Desktop/Data Incubator Project /international_maternal_mortality_rates.csv")
## Warning: Missing column names filled in: 'X61' [61], 'X62' [62]
## Parsed with column specification:
## cols(
## .default = col_character(),
## `1990` = col_integer(),
## `1991` = col_integer(),
## `1992` = col_integer(),
## `1993` = col_integer(),
## `1994` = col_integer(),
## `1995` = col_integer(),
## `1996` = col_integer(),
## `1997` = col_integer(),
## `1998` = col_integer(),
## `1999` = col_integer(),
## `2000` = col_integer(),
## `2001` = col_integer(),
## `2002` = col_integer(),
## `2003` = col_integer(),
## `2004` = col_integer(),
## `2005` = col_integer(),
## `2006` = col_integer(),
## `2007` = col_integer(),
## `2008` = col_integer(),
## `2009` = col_integer()
## # ... with 8 more columns
## )
## See spec(...) for full column specifications.
international_cleaned_names <- clean_names(international_maternal_mortality)
international_cleaned <- c("country_name", "country_code", "x2013")
new_international_data <- international_cleaned_names[international_cleaned]
united_states_1990_2015 <- international_cleaned_names %>%
filter(country_name=="United States")
us_mm_1990_2015 <- read_csv("/Users/cecilialow-weiner/Desktop/Data Incubator Project /maternal_mortality_US_1990_2015.csv") %>%
clean_names()
## Parsed with column specification:
## cols(
## Year = col_integer(),
## `Maternal Mortality Rate (per 100,000 live births)` = col_integer()
## )
ggplot(data=us_mm_1990_2015, aes(x=year, y=maternal_mortality_rate_per_100_000_live_births))+
geom_line()+
geom_point()
MM_Rate_NYC_time <- read_csv("/Users/cecilialow-weiner/Desktop/Data Incubator Project /maternal_mortality_NYC_2006_2015.csv")%>%
clean_names()
## Parsed with column specification:
## cols(
## Year = col_integer(),
## `Maternal Mortality Rate (per 100,000 live births)` = col_double()
## )
ggplot(data=MM_Rate_NYC_time, aes(x=year, y=maternal_mortality_rate_per_100_000_live_births, group=1)) +
geom_line()+
geom_point()+
scale_x_continuous(name="Year", limits=c(2006, 2015),breaks=seq(2006,2015,1)) +
scale_y_continuous(name="Maternal Mortality Rate", limits=c(10, 40),breaks=seq(10,40,2))